###########################
# Replication Files 
# Date: March 2024 
# JEPS: When do voters see Fraud? Evaluating the role of poll supervision on perceptions of integrity
# RStudio 2022.02.1+461 "Prairie Trillium" Release (8aaa5d470dd82d615130dbf663ace5c7992d48e3, 2022-03-17) for macOS
############################
library(cregg)
library(ggplot2)
library(readr)
library(tidyverse)
library(patchwork)
library(devtools)
library(usethis)
devtools::install_github("m-freitag/cjpowR")
library(cjpowR)
############################################ Data Preparation ##############################################
#original unedited dataframe for comparison/reference (fill in file path)
dat1 <- read_csv("/filepath/rep_full_df.csv")

#load data for analysis (fill in file path)
dat2 <- read_csv("/filepath/rep_full_df.csv")

######### Dropped Cases (robustness check) ##########
#removal of rows where the randomizer selected two forms with the same treatment combinations (no difference to results)

# Sorting data frame so that the conjoint runs consecutively proceed each other (for each person)
dat2_temp <- dat2 %>% arrange(id, run, treatment)

# Adding a new column 'treatment_match' that stores the comparison of 'treatment' code
dat2_temp <- dat2_temp %>%
  group_by(id, run) %>%
  mutate(treatment_match = lag(treatment) == treatment)

#subset dataframe to extract the observation that has a duplicate value
dupe_check <- subset(dat2_temp, dat2_temp$treatment_match == TRUE)

#create two lists that have information about the duplicate id and run
temp_dictionary <- data.frame(id = dupe_check$id,run = dupe_check$run)

empty_vec <- vector("character", nrow(dat2))

#use temp_list as a filter on the full dataframe to create a vector (future column) that
#indicates whether each row in the dataframe has a duplicate value or not
 
for (i in 1:nrow(dat2)) {
  empty_vec[i] <- if (!is.na(dat2$id[i]) && !is.na(dat2$run[i])) {
    if (any(dat2$id[i] == temp_dictionary[[1]] & dat2$run[i] == temp_dictionary[[2]])) {
      'dupe'
    } else {
      'not_dupe'
    }
  } else {
    'not_dupe' 
  }
}

#adding column to dataframe indicating if the observations has a dupe or not
dat2$dupe_check <- empty_vec 

#dropping duplicates for main analyses (should be a final dataset of 2298)
dat2 <- subset(dat2, dupe_check == 'not_dupe')

############################# Labeling Variables ##########################################

#Creation of treatment combination variable for later analysis (done as first step before any relabeling):
dat2$trt <- paste0(as.character(dat2$presiding), as.character(dat2$party1 + dat2$party2), as.character(dat2$observer))


#relabel feature levels
dat2$presiding <- ifelse(dat2$presiding == 0, "PO absent","PO present")
dat2$party1 <- ifelse(dat2$party1 == 0, "Agent absent","Agent present")
dat2$party2 <- ifelse(dat2$party2 == 0, " Agent absent "," Agent present ")
dat2$observer <- ifelse(dat2$observer == 0, "Observer absent","Observer present")
dat2$party1_cros <- ifelse(dat2$party1_cros == 0, "NO Crossing","Crossed Out")
dat2$party2_cros <- ifelse(dat2$party2_cros == 0, "No Crossing ","Crossed out")

#turn the feature levels into factors
dat2$presiding <- factor(dat2$presiding, levels = c("PO absent", "PO present"))
dat2$party1 <-factor(dat2$party1, levels = c("Agent absent", "Agent present"))
dat2$party2 <- factor(dat2$party2, levels = c(" Agent absent ", " Agent present "))
dat2$observer <- factor(dat2$observer, levels = c("Observer absent", "Observer present"))
dat2$party1_cros <- factor(dat2$party1_cros, levels = c("NO Crossing", "Crossed Out"))
dat2$party2_cros <- factor(dat2$party2_cros, levels = c("No Crossing ", "Crossed out"))

#Creation of a party variable for each respondent
dat2$party3 = 'other'
dat2$party3[which(dat2$party == 'Jubilee Alliance Party')] <- 'Jubilee/DPP'
dat2$party3[which(dat2$party == 'Orange Democratic Movement (ODM)')] <- 'ODM/MCP'
dat2$party3[which(dat2$party == 'Malawi Congress Party (MCP)')] <- 'ODM/MCP'
dat2$party3[which(dat2$party == 'Democratic Progressive Party (DPP)')] <- 'Jubilee/DPP'

#Complete the below renaming step only after the previous steps have been done.
#Renaming of variables. This is necessary for producing the cleanly labelled AMCE plots at the end
columns_to_rename <- c("presiding", "party1", "party2", "observer", "party1_cros","party2_cros")
indices_to_rename <- match(columns_to_rename, colnames(dat2))
colnames(dat2)[indices_to_rename] <-c('Presiding Officer','Jub/DPP','ODM/MCP','Observers','Jub/DPP Error','ODM/MCP Error')

#Creation of Factor variables
dat2$party3 = factor(dat2$party3)
dat2$trust_EMB = factor(dat2$trust_EMB)
dat2$err_typ = factor(dat2$err_typ, levels = c("clean", "errors"))
dat2$obs_kq <- factor(dat2$obs_kq,labels = c('Low Observer Awareness', 'High Observer Awareness'))

#Vote difference factors:
dat2$vote_df2 = factor(dat2$vote_df2, levels = c("small", "large")) #corrected vote difference variable
dat2$vote_df = factor(dat2$vote_df, levels = c("small", "large")) # original incorrect vote difference variable


########################################### Main paper results ###########################################################

#################### Full sample analysis (Figure 3) ######################

general <- cj(dat2, chosen ~ `Presiding Officer` + `Jub/DPP` + `ODM/MCP` + `Observers`+ `Jub/DPP Error`+ `ODM/MCP Error`,
              id = ~id)
#Plot results
full_result <- plot(general)
full_result
################### Presiding Officer Analysis (Figure 4) ################
sg_po <- cj(dat2, chosen ~ `Presiding Officer` + `Jub/DPP` + `ODM/MCP` + `Observers` + `Jub/DPP Error` + `ODM/MCP Error`,
            id = ~id, by = ~ trust_EMB)  #the 'by = ~ trust_EMB' variable groups observations by 
#removal of non-response
sg_po <- sg_po[!(sg_po$BY=="Not sure" | sg_po$BY=="Rather not say"),]

#plot creation
po_plt <- plot(sg_po, group = 'trust_EMB',legend_title = 'EMB Trust Level') 
po_plt # output
####################### Party Analysis (Figure 5) #########################

#Creating new dataframe for party analysis 
dat3 = dat2

#Creating a descriptive variable for note of co-partisan agent and later filtering of observations 
dat3$copart_ag = 'other'

#In the below code, if the condition is met the value placed in 'copart_ag' indicates whether the cited party had an agent present or not
#it uses the factor variables on monitor presence absence from another part of the dataframe

#Ruling party 
dat3$copart_ag <- ifelse(dat3$party == 'Jubilee Alliance Party', dat3$`Jub/DPP`, dat3$copart_ag)
dat3$copart_ag <- ifelse(dat3$party == 'Democratic Progressive Party (DPP)', dat3$`Jub/DPP`, dat3$copart_ag)

#Opposition
dat3$copart_ag <- ifelse(dat3$party == 'Orange Democratic Movement (ODM)', dat3$`ODM/MCP`, dat3$copart_ag)
dat3$copart_ag <- ifelse(dat3$party == 'Malawi Congress Party (MCP)', dat3$`ODM/MCP`, dat3$copart_ag)

#removal of observations with no partisan affiliation (and hence cannot be aligned to a particular kind of agent)
dat3 = dat3[!(dat3$"copart_ag"=="other"),]  # this leaves ~ 1038 observations (accounting for the data dropped earlier)

###########################################################
#Creation of the first proper co-partisan agent variable (it says whether a co-partisan agent is present/absent) 

#Note: The factor levels start their count from 1 (in terms of ordinal value). 
#So for the binary `Jub/DPP`,  '1' is the baseline position (co-part agent absent) value and '2' is (co-part agent present)

dat3$copart_ag2 <- ifelse(dat3$copart_ag == '1', 'Co-part agt absent', 'Co-part agt present')

#Creation of a second, more fine grain co-partisan agent variable (it accounts for no agents, and two agents)
dat3$copart_ag3<- ifelse(dat3$agt_num == 'one agent' & dat3$copart_ag2 == 'Co-part agt present', "Copartisan",
                         ifelse(dat3$agt_num == 'one agent' & dat3$copart_ag2 == 'Co-part agt absent', "Non-copartisan",
                                ifelse(dat3$agt_num == 'two agents', "Copart & Non-Copart","No agents")))

#Recreating the error variables so that they fit the co-partisan vs. partisan structure of the analysis 
dat3$copart_err = 'Error absent'
dat3$copart_err<- ifelse(dat3$party == 'Orange Democratic Movement (ODM)' & dat3$`ODM/MCP Error`  == 'Crossed out', "Error present", dat3$copart_err)
dat3$copart_err<- ifelse(dat3$party == 'Malawi Congress Party (MCP)' & dat3$`ODM/MCP Error`  == 'Crossed out', "Error present", dat3$copart_err)
dat3$copart_err<- ifelse(dat3$party == 'Democratic Progressive Party (DPP)' & dat3$`Jub/DPP Error`  == 'Crossed Out', "Error present", dat3$copart_err)
dat3$copart_err<- ifelse(dat3$party == 'Jubilee Alliance Party' & dat3$`Jub/DPP Error`  == 'Crossed Out', "Error present", dat3$copart_err)

dat3$nocopart_err = 'Error absent ' #space in the string necessary for plotting reasons
dat3$nocopart_err <- ifelse(dat3$party == 'Orange Democratic Movement (ODM)' & dat3$`Jub/DPP Error`  == 'Crossed Out', "Error present ", dat3$nocopart_err)
dat3$nocopart_err <- ifelse(dat3$party == 'Malawi Congress Party (MCP)' & dat3$`Jub/DPP Error`  == 'Crossed Out', "Error present ", dat3$nocopart_err)
dat3$nocopart_err <- ifelse(dat3$party == 'Democratic Progressive Party (DPP)' & dat3$`ODM/MCP Error`  == 'Crossed out', "Error present ", dat3$nocopart_err)
dat3$nocopart_err <- ifelse(dat3$party == 'Jubilee Alliance Party' & dat3$`ODM/MCP Error`  == 'Crossed out', "Error present ", dat3$nocopart_err)

#Creating factor variables for partisan outcomes
dat3$copart_ag2 <- factor(dat3$copart_ag2, levels = c("Co-part agt absent", "Co-part agt present")) 
dat3$copart_ag3 <- factor(dat3$copart_ag3, levels = c("No agents","Non-copartisan","Copartisan", "Copart & Non-Copart")) 
dat3$copart_err <- factor(dat3$copart_err, levels = c("Error absent", "Error present")) 
dat3$nocopart_err <- factor(dat3$nocopart_err, levels = c("Error absent ", "Error present ")) 

#renaming variables for easier labeling
dat3$'Copartisan Agent' = dat3$copart_ag2
dat3$'Agents Present' = dat3$copart_ag3
dat3$'Own Party Error' = dat3$copart_err
dat3$'Other Party Error' = dat3$nocopart_err

#analysis
sg_pp <- cj(dat3, chosen ~ `Presiding Officer`+ `Agents Present` + `Observers` + `Own Party Error`+`Other Party Error`,id = ~id)

#plotting outcomes
sg_pp_plt <- plot(sg_pp) + theme(legend.text = element_text(size = 9))

sg_pp_plt #output

###################### Observer Analysis (Figure 6) #######################

sg_ob <- cj(dat2, chosen ~ `Presiding Officer` + `Jub/DPP` + `ODM/MCP` + `Observers` + `Jub/DPP Error` + `ODM/MCP Error`,
            id = ~id, by = ~ obs_kq)

obs_plt <- plot(sg_ob, group = 'obs_kq',legend_title = 'Observer Awareness') 

obs_plt #output

################# Plot summarising forced-choice reasoning (Figure 7) ######################

#calculating means and standard errors
df_summary <- dat2 %>%
  select(trt, tally_chg2, unprt_int2, p_excess2, po_prs2) %>%
  group_by(trt) %>%
  summarise(
    mean_tally_chg2 = mean(tally_chg2),
    sem_tally_chg2 = sd(tally_chg2) / sqrt(length(tally_chg2)),
    mean_unprt_int2 = mean(unprt_int2),
    sem_unprt_int2 = sd(unprt_int2) / sqrt(length(unprt_int2)),
    mean_p_excess2 = mean(p_excess2),
    sem_p_excess2 = sd(p_excess2) / sqrt(length(p_excess2)),
    mean_po_prs2 = mean(po_prs2),
    sem_po_prs2 = sd(po_prs2) / sqrt(length(po_prs2)))

# Reshape data to long format
dat4 <- df_summary %>%
  pivot_longer(
    cols = starts_with(c("mean_", "sem_")),
    names_to = c(".value", "variable"),
    names_sep = "_") 

#renaming variable for clarity 
colnames(dat4)[colnames(dat4) == "variable"] ="concern"

#Creating the plot
pt <- ggplot(dat4, aes(x=trt, y=mean, group=concern, color=concern, shape=concern)) + 
  geom_pointrange(aes(ymin= mean-sem, ymax= mean+sem),
                  position = position_dodge(.5)) + 
  labs(colour = "Concern", shape = 'Concern') + xlab('Treatment Combinations') +
  ylab('Average level of agreement on miconduct concern (5 = Strongly Agree)') 

#attaching color, cleaning up labels, properly naming the concern variables
pt <- pt +
  scale_colour_discrete(labels=c('Excess Party Influence', 'Official Pressured',
                                 'Tally Change Threat', 'Unprotected Party Interest')) +
  scale_shape_discrete(labels=c('Excess Party Influence', 'Official Pressured',
                                'Tally Change Threat', 'Unprotected Party Interest'))+
  theme(axis.text.x = element_text(size = 11)) +
  theme(axis.text.y = element_text(size = 10)) +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank())+
  theme(axis.line = element_line(color = 'black')) +
  theme(panel.grid.major.x = element_line(color = "lightgrey", linetype = "dashed"))+
  theme(legend.key = element_rect(fill = "white")) +
  theme(legend.position = "bottom") +
  theme(legend.text = element_text(size = 10))

pt #output
########################################## Supplementary Appendices ##############################################

##################### Power Analyses #####################

#create a vectorised version of the power calculator function from the cjpowR package. 
cjpowr_amce_vec <- Vectorize(cjpowr_amce)

#the below code calculates power and type-s error for conjoint attributes with two feature levels
d <- expand.grid(amce = c(0.05, 0.2, 0.35), n = seq(0, 1000, by = 10))  #create a dataframe with benchmark AMCE effects and sample sizes 
df <- t(cjpowr_amce_vec(amce = d$amce, n = d$n, levels = 2, alpha = 0.05)) #run the vectorised function using the info in 'd'
df <- data.frame(df)
df[] <- lapply(df, unlist) 
df$AMCE <- factor(df$amce, levels = rev(unique(df$amce))) #create factor variable from benchmark AMCEs
df_2 <- df
df_2$group <- '2 Levels'

#the below code calculates power and type-s error for conjoint attributes with four feature levels
d <- expand.grid(amce = c(0.05, 0.2, 0.35), n = seq(0, 1000, by = 10)) #power = seq(0.2, 1, by = 0.5))
df <- t(cjpowr_amce_vec(amce = d$amce, n = d$n, levels = 4, alpha = 0.05))
df <- data.frame(df)
df[] <- lapply(df, unlist)
df$AMCE <- factor(df$amce, levels = rev(unique(df$amce)))
df_4 <- df
df_4$group <- '4 Levels'

#compilation
df_all <- rbind(df_2, df_4)

#plotting 
p0 <- ggplot(data = df_all)
p1 <- geom_line(aes(x = n, y = power, linetype = AMCE))
p2 <- geom_line(aes(x = n, y = type_s, linetype = AMCE))

#the plot for statistical power specifically 
out2 <- p0 + p1 + geom_hline(yintercept = 0.8, color = 'grey60', linewidth = 1) + 
  geom_vline(xintercept = 253, color = 'blue') +
  theme_classic() + labs(x = 'Sample Size', y = 'Power') +
  theme(legend.position = "none") +
  scale_y_continuous(breaks = c(0.8), labels = c('80%'), limits = c(0,1)) +
  facet_grid(group ~.)

out2 #output

#the plot of type-s error specifically
out3 <- p0 + p2 + 
  theme_classic() + labs(x = 'Sample Size', y = 'Type S-Error') +
  scale_y_continuous(limits = c(0,0.5)) +
  geom_vline(xintercept = 253, color = 'blue') +
  facet_grid(group ~.)

out3 #output

#the combined plot in the appendix 
out_all = out2 + out3
out_all #output

############## Sample Comparison to AfroBarometer (Figures A3, A4, A5, A6) ##############

# I created the original manuscript plots in Python and have just replicated the same plots in R
# So that all of the analysis code is in this R script (hence the different plot aesthetics).
# Note: there may be slight differences in the demographic comparisons once the duplicate observations are dropped. 

age_bins <- c(17, 25, 35, 45, 55, 65, 100)
age_labels <- c('18-25', '26-35', '36-45', '46-55', '56-65', '66+')
dat2$age_group <- cut(dat2$age, breaks = age_bins, labels = age_labels, include.lowest = FALSE)

#Malawi Age Comparison (Figure A3 - top image)
age_stat_mw <- dat2 %>%
  filter(country == 'malawi') %>%
  count(age_group) %>%
  mutate(Qualtrics = (n /840)*100) 

#AfroBarometer Data from Malawi Round 9, Summary of Results report
age_plt_mw <- age_stat_mw %>%
  mutate(AfroBarometer = c(33.6, 23.4, 19.7, 11.4, 6.0)) %>% 
  gather(key = survey, value = Value, c(Qualtrics, AfroBarometer)) %>%
  mutate(survey = factor(survey, levels = c('Qualtrics', 'AfroBarometer'))) %>%
  ggplot(aes(x = age_group, y = Value, fill = survey)) +
  geom_col(position = "dodge") +
  scale_fill_manual(values = c("Qualtrics" = "skyblue", "AfroBarometer" = "orange"), 
                    breaks = c("Qualtrics", "AfroBarometer")) +
  labs(x = "Age groups", y = "Sample Percentage", title = "Malawi Age Comparison") +
  theme_minimal()   

#Malawi Party Comparison  (Figure A3 - bottom image)
party_stat_mw <- dat2 %>%
  filter(country == 'malawi') %>%
  count(party) %>%
  mutate(Qualtrics = (n /840)*100)

#AfroBarometer Data from Malawi Round 9, Summary of Results report
party_plt_mw <- party_stat_mw %>%
  mutate(AfroBarometer = c(28.1, 22.2, 39.8, 0.3, 3.7, 5.4)) %>% 
  gather(key = survey, value = Value, c(Qualtrics, AfroBarometer)) %>%
  mutate(survey = factor(survey, levels = c('Qualtrics', 'AfroBarometer'))) %>%
  mutate(party = factor(party, levels = c('Not close to any of the above',
                                          'United Transformation Movement (UTM)', 'Malawi Congress Party (MCP)', 'Democratic Progressive Party (DPP)',
                                          'Other','United Democratic Front (UDF)'))) %>%
  ggplot(aes(x = party, y = Value, fill = survey)) + geom_col(position = "dodge") +
  scale_fill_manual(values = c("Qualtrics" = "skyblue", "AfroBarometer" = "orange"), 
                    breaks = c("Qualtrics", "AfroBarometer")) +
  labs(x = "Political Parties", y = "Sample Percentage", title = "Malawi Party Comparison") +
  theme_minimal() +theme(axis.text.x = element_text(angle = 45, hjust = 1))


#Malawi Education Comparison (Figure A4)
educ_stat_mw <- dat2 %>%
  filter(country == 'malawi') %>%
  count(educ) %>%
  mutate(Qualtrics = (n /840)*100)

#create rows for the lower levels of schooling that I do not have in Qualtrics Sample
ed_mw_add <- data.frame(
  educ = c('No Formal Schooling', 'Some primary schooling', 'Primary school completed'), 
  n = c(0,0,0), 
  Qualtrics = c(0,0,0))
#attach to df
educ_stat_mw <- rbind(educ_stat_mw, ed_mw_add)

#AfroBarometer Data from Malawi Round 9, Summary of Results report
educ_plt_mw <- educ_stat_mw %>%
  mutate(AfroBarometer = c(2.9, 13.2, 16.0, 0.6, 0.7, 7.1,47.9,11.6)) %>% 
  gather(key = survey, value = Value, c(Qualtrics, AfroBarometer)) %>%
  mutate(survey = factor(survey, levels = c('Qualtrics', 'AfroBarometer'))) %>%
  mutate(educ = factor(educ, levels = c('No Formal Schooling', 'Some primary schooling', 'Primary school completed'
                                        ,'Some secondary school','Secondary school completed', 
                                        'Post-secondary training but not at a university','University completed'))) %>%
  ggplot(aes(x = educ, y = Value, fill = survey)) + geom_col(position = "dodge") +
  scale_fill_manual(values = c("Qualtrics" = "skyblue", "AfroBarometer" = "orange"), 
                    breaks = c("Qualtrics", "AfroBarometer")) +
  labs(x = "Education Levels", y = "Sample Percentage", title = "Malawi Education Comparison") +
  theme_minimal() + theme(axis.text.x = element_text(angle = 90, hjust = 1))


#Kenya Age Comparison (Figure A5 - top image)
age_stat_ky <- dat2 %>%
  filter(country == 'kenya') %>%
  count(age_group) %>%
  mutate(Qualtrics = (n /1500) * 100)

#AfroBarometer Data from Kenya Round 8, Summary of Results report
age_plt_ky <- age_stat_ky %>%
  mutate(AfroBarometer = c(27.8, 28.7, 20.4, 11.1, 7.4)) %>% 
  gather(key = survey, value = Value, c(Qualtrics, AfroBarometer)) %>%
  mutate(survey = factor(survey, levels = c('Qualtrics', 'AfroBarometer'))) %>%
  ggplot(aes(x = age_group, y = Value, fill = survey)) +
  geom_col(position = "dodge") +
  scale_fill_manual(values = c("Qualtrics" = "skyblue", "AfroBarometer" = "orange"), 
                    breaks = c("Qualtrics", "AfroBarometer")) +
  labs(x = "Age groups", y = "Sample Percentage", title = "Kenya Age Comparison") +
  theme_minimal()   


#Kenya Party Comparison (Figure A5 - bottom image)
party_stat_ky <- dat2 %>%
  filter(country == 'kenya') %>%
  count(party) %>%
  mutate(Qualtrics = (n / 1500) * 100) %>%
  filter(!(party == "Don't Know" | party == "Prefer not to say"))

#reattaching the 'Don't Know' and 'Prefer not to say under one row'
new_row <- data.frame(
  party = "Not Applicable", 
  n = 288, 
  Qualtrics = (288/1500)*100)

#attaching row
party_stat_ky <- rbind(party_stat_ky, new_row)

#AfroBarometer Data from Kenya Round 8, Summary of Results report
party_plt_ky <- party_stat_ky %>%
  mutate(AfroBarometer = c(0.7, 0.7, 25.5, 0.4, 0.2, 0.3, 17.1, 1,0.1, 1.6, 51.3)) %>% 
  gather(key = survey, value = Value, c(Qualtrics, AfroBarometer)) %>%
  mutate(survey = factor(survey, levels = c('Qualtrics', 'AfroBarometer'))) %>%
  mutate(party = factor(party, levels = c('Orange Democratic Movement (ODM)','Jubilee Alliance Party',
                                          'Other','Amani National Congress', 'Wiper Democratic Movement',
                                          'Maendeleo Chap Chap', 'KANU', 'Ford Kenya', 'Thirdway Alliance',
                                          'NARC - Kenya', 'Not Applicable'))) %>%
  ggplot(aes(x = party, y = Value, fill = survey)) + geom_col(position = "dodge") +
  scale_fill_manual(values = c("Qualtrics" = "skyblue", "AfroBarometer" = "orange"), 
                    breaks = c("Qualtrics", "AfroBarometer")) +
  labs(x = "Political Parties", y = "Sample Percentage", title = "Kenya Party Comparison") +
  theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust = 1))



#Kenya Education Comparison (Figure A6)
educ_stat_ky <- dat2 %>%
  filter(country == 'kenya') %>%
  count(educ) %>%
  mutate(Qualtrics = (n /1500) * 100)

#again creating rows for the lower levels of schooling that I do not have in Qualtrics Sample
educ_stat_ky <- rbind(educ_stat_ky, ed_mw_add)

#AfroBarometer Data from Kenya Round 8, Summary of Results report
educ_plt_ky <- educ_stat_ky %>%
  mutate(AfroBarometer = c(14.8, 23.2, 11.7, 3.0, 5.4, 3.7,13.9, 22.1)) %>% 
  gather(key = survey, value = Value, c(Qualtrics, AfroBarometer)) %>%
  mutate(survey = factor(survey, levels = c('Qualtrics', 'AfroBarometer'))) %>%
  mutate(educ = factor(educ, levels = c('No Formal Schooling', 'Some primary schooling', 'Primary school completed'
                                        ,'Some secondary school','Secondary school completed', 
                                        'Post-secondary training but not at a university','University completed'))) %>%
  ggplot(aes(x = educ, y = Value, fill = survey)) + geom_col(position = "dodge") +
  scale_fill_manual(values = c("Qualtrics" = "skyblue", "AfroBarometer" = "orange"), 
                    breaks = c("Qualtrics", "AfroBarometer")) +
  labs(x = "Education Levels", y = "Sample Percentage", title = "Kenya Education Comparison") +
  theme_minimal() + theme(axis.text.x = element_text(angle = 90, hjust = 1))


################ Clean vs. Error Analysis (Figure A8) #####################
sg_err <- cj(dat2, chosen ~ `Presiding Officer` + `Jub/DPP` + `ODM/MCP` + `Observers`,
             id = ~id, by = ~ err_typ) #analysing by the err_typ (clean/error) categories

#plotting
err_plt <- plot(sg_err, group = 'err_typ', legend_title = 'Error Presence') +
  scale_colour_discrete(labels=c('No Errors', 'Errors'), na.translate = F)+
  theme(legend.text = element_text(size = 11))

err_plt #output

################## Vote Difference Analysis (Figure A11) ##################

# In this analysis I use the amended vote_df2 (for the previous (incorrect) submission output use 'vote_df' in 'by = ')
sg_vd <- cj(dat2, chosen ~ `Presiding Officer` + `Jub/DPP` + `ODM/MCP` + `Observers` + `Jub/DPP Error` + `ODM/MCP Error`,
            id = ~id, by = ~ vote_df2)
diff_plt <- plot(sg_vd, group = 'vote_df2', legend_title = 'Inital Vote Difference') +
  scale_colour_discrete(labels=c('Larger', 'Smaller'), na.translate = F)

diff_plt #output
################ Clean Forms only Analysis (Figure A12) #####################

#subset clean forms only
dat5 = dat2[dat2$err_typ == 'clean',]

#analysis
sg_clean <- cj(dat5, chosen ~ `Presiding Officer` + `Jub/DPP` + `ODM/MCP` + `Observers`, id = ~id)    
clean_plt <- plot(sg_clean)

clean_plt # output

######################### Country Comparisons #########################

########### Presiding officer analysis by country (Figure A13) ##############
#malawi 
sg_po_mw <- cj(subset(dat2, country == 'malawi'), chosen ~ `Presiding Officer` + `Jub/DPP` + `ODM/MCP` + `Observers` + `Jub/DPP Error` + `ODM/MCP Error`,
               id = ~id, by = ~ trust_EMB)
sg_po_mw <- sg_po_mw[!(sg_po_mw$BY=="Not sure" | sg_po_mw$BY=="Rather not say"),]

po_plt_mw <- plot(sg_po_mw, group = 'trust_EMB', legend_title = 'Trust') +
  ggtitle("Malawi") +  theme(legend.position = "none") + theme(plot.title = element_text(hjust = 0.5))

#kenya  
sg_po_ky <- cj(subset(dat2, country == 'kenya'), chosen ~ `Presiding Officer` + `Jub/DPP` + `ODM/MCP` + `Observers` + `Jub/DPP Error` + `ODM/MCP Error`,
               id = ~id, by = ~ trust_EMB)
sg_po_ky <- sg_po_ky[!(sg_po_ky$BY=="Not sure" | sg_po_ky$BY=="Rather not say"),]

po_plt_ky <- plot(sg_po_ky, group = 'trust_EMB', legend_title = 'Trust') + 
  ggtitle("Kenya")  + theme(plot.title = element_text(hjust = 0.5)) + 
  theme(legend.position = "right",legend.box = "vertical",legend.margin = margin(0),
        legend.spacing = unit(0.5, "lines"))

po_plt_country <- po_plt_mw + po_plt_ky #combined plot

po_plt_country #output

############# Political Party Analysis by country (Figure A14) ################

#malawi
sg_pp2_mw <- cj(subset(dat3, country == 'malawi'), chosen ~ `Presiding Officer`+ `Agents Present` + `Observers` + `Own Party Error`+`Other Party Error`,id = ~id)
#kenya
sg_pp2_ky <- cj(subset(dat3, country == 'kenya'), chosen ~ `Presiding Officer`+ `Agents Present` + `Observers` + `Own Party Error`+`Other Party Error`,id = ~id)

pp_mw_plt <- plot(sg_pp2_mw) + theme(legend.position = "none") + 
  ggtitle("Malawi")  + theme(plot.title = element_text(hjust = 0.5))


pp_ky_plt <- plot(sg_pp2_ky) + 
  theme(legend.position = "right",legend.box = "vertical",legend.margin = margin(0),
        legend.spacing = unit(0.5, "lines"))  + 
  ggtitle("Kenya")  + theme(plot.title = element_text(hjust = 0.5))

pp_by_country <- pp_mw_plt + pp_ky_plt #combined plot

pp_by_country

################# Observer analysis by country (Figure A15) ######################
#malawi 
sg_ob_mw <- cj(subset(dat2, country == 'malawi'), chosen ~ `Presiding Officer` + `Jub/DPP` + `ODM/MCP` + `Observers` + `Jub/DPP Error` + `ODM/MCP Error`,
               id = ~id, by = ~ obs_kq)

obs_plt_mw <- plot(sg_ob_mw, group = 'obs_kq',legend_title = 'Observer Awareness') + 
  ggtitle("Malawi")  + theme(plot.title = element_text(hjust = 0.5))+ theme(legend.position = "none")

#kenya
sg_ob_ky <- cj(subset(dat2, country == 'kenya'), chosen ~ `Presiding Officer` + `Jub/DPP` + `ODM/MCP` + `Observers` + `Jub/DPP Error` + `ODM/MCP Error`,
               id = ~id, by = ~ obs_kq)

obs_plt_ky <- plot(sg_ob_ky, group = 'obs_kq',legend_title = 'Observer Awareness') + 
  ggtitle("Kenya")  + theme(plot.title = element_text(hjust = 0.5))+ 
  theme(legend.position = "right",legend.box = "vertical",legend.margin = margin(0),
        legend.spacing = unit(0.5, "lines"))

obs_by_country <- obs_plt_mw + obs_plt_ky #combined plot

obs_by_country #output

############# Marginal Mean and AMCE comparisons ##############

############## Presiding Officers (Figure A16)  ###############
#AMCE for presiding officers  
sg_po <- cj(dat2, chosen ~ `Presiding Officer` + `Jub/DPP` + `ODM/MCP` + `Observers` + `Jub/DPP Error` + `ODM/MCP Error`,
            id = ~id, by = ~ trust_EMB)
sg_po <- sg_po[!(sg_po$BY=="Not sure" | sg_po$BY=="Rather not say"),]

po_plt_comp <- plot(sg_po, group = 'trust_EMB') + theme(legend.position = "none")

# Marginal Means presiding officers
mm_po <- cj(dat2, chosen ~ `Presiding Officer` + `Jub/DPP` + `ODM/MCP` + `Observers` + `Jub/DPP Error` + `ODM/MCP Error`,
            estimate = 'mm', id = ~id, by = ~ trust_EMB)
mm_po  <- mm_po[!(mm_po$BY=="Not sure" | mm_po$BY=="Rather not say"),]
mm_po_plt <- plot(mm_po, group = 'trust_EMB', vline = 0.5, legend_title = 'EMB Trust Level') +
  theme(plot.title = element_text(hjust = 0.5)) + 
  theme(legend.position = "right",legend.box = "vertical",legend.margin = margin(0),
        legend.spacing = unit(0.5, "lines"))

#final plot
po_mm_amce <- po_plt_comp + mm_po_plt

po_mm_amce #output

############ Political Party Agent Analysis (Figure A17) ############
#AMCEs
sg_pp <- cj(dat3, chosen ~ `Presiding Officer`+ `Agents Present` + `Observers` + `Own Party Error`+`Other Party Error`,id = ~id)
sg_pp_comp <- plot(sg_pp) + theme(legend.position = "none")

sg_pp2_mm <- cj(dat3, chosen ~ `Presiding Officer`+ `Agents Present` + `Observers` + `Own Party Error`+`Other Party Error`, estimate = "mm",id = ~id)

mm_pp2_plt <- plot(sg_pp2_mm, vline = 0.5) +
  theme(plot.title = element_text(hjust = 0.5)) + 
  theme(legend.position = "right",legend.box = "vertical",legend.margin = margin(0),
        legend.spacing = unit(0.5, "lines"))

pp_mm_amce <- sg_pp_comp + mm_pp2_plt

pp_mm_amce #output

############# Domestic Observer (Figure A18) #############
#AMCE's
sg_ob <- cj(dat2, chosen ~ `Presiding Officer` + `Jub/DPP` + `ODM/MCP` + `Observers` + `Jub/DPP Error` + `ODM/MCP Error`,
            id = ~id, by = ~ obs_kq)

#marginal means
mm_sg_ob <- cj(dat2, chosen ~ `Presiding Officer` + `Jub/DPP` + `ODM/MCP` + `Observers` + `Jub/DPP Error` + `ODM/MCP Error`,
               estimate = "mm", id = ~id, by = ~ obs_kq)

#Plot creation
obs_plt_comp <- plot(sg_ob, group = 'obs_kq',legend_title = 'Observer Awareness') + theme(legend.position = "none")

mm_obs_plt <- plot(mm_sg_ob, group = 'obs_kq', legend_title = 'Observer Awareness', vline = 0.5) +
  theme(plot.title = element_text(hjust = 0.5)) + 
  theme(legend.position = "right",legend.box = "vertical",legend.margin = margin(0),
        legend.spacing = unit(0.5, "lines"))
#final plot
obs_mm_amce <- obs_plt_comp + mm_obs_plt

obs_mm_amce #output

################ Further Partisan Analyses (Figure A19 and A20) ####################

#Analysis just comparing whether a co-partisan agent is present or absent
sg_pp2 <- cj(dat3, chosen ~ `Presiding Officer`+ `Copartisan Agent` + `Observers` + `Own Party Error`+`Other Party Error`,id = ~id)
sg_pp1_plt <- plot(sg_pp2) + theme(legend.text = element_text(size = 9))
sg_pp1_plt #output

#Analysis by political party
sg_pp_other <- cj(dat2, chosen ~ `Presiding Officer` + `Jub/DPP` + `ODM/MCP` + `Observers`+ `Jub/DPP Error`+ `ODM/MCP Error`,
                  id = ~id, by = ~ party3)

sg_pp_other_plt <- plot(sg_pp_other, group = 'party3',legend_title = 'Party Affliation') +
  theme(legend.text = element_text(size = 11))

sg_pp_other_plt #output

################ ACIE Results (Figure A21) #####################
#interaction plot comparing the effects of error depending on the presence and absence of a co-partisan agent
amces_by_agent <- cj(dat3, chosen ~ copart_err, id = ~id, estimate = "amce", by = ~ copart_ag2)
diff_amces <- cj(dat3, chosen ~ copart_err, id = ~id, estimate = "amce_differences", by = ~ copart_ag2)

#for this plot 'copart_err' is equivalent to the 'Own.Party.Error' variable in the appendices, it is just the same column renamed
error_agent_int <- plot(rbind(amces_by_agent, diff_amces)) + ggplot2::facet_wrap(~BY, ncol = 3L)

error_agent_int #output

################ Winner-Loser Analysis (Figures A22 and A23) #####################
#The winner-loser allocation depends in part on the vote_df2 variable. I use the corrected version of vote_df2. To get the
#previous winner-loser plots use the old vote_df variable in the dataset

#creation of placeholder variable designating a respondent as an electoral winner or loser
dat2$winlos = 'loser'

#In Malawi,the initial vote count numbers were such that that MCP was the electoral winner. So to designate the winner: 
dat2$winlos <- ifelse(dat2$country == 'malawi' & dat2$party == 'Malawi Congress Party (MCP)', 'winner', dat2$winlos)


#In Kenya the allocation of winner was different. In Kenya the initial vote count numbers were such that:
#The Jubilee Alliance party won whenever the vote difference on the form was small, and lost when the vote difference was large. Which is why
#there are two lines of code designate the electoral winner conditional on party and vote_df2 (the corrected version) for Kenya. 

dat2$winlos<- ifelse(dat2$country == 'kenya' & dat2$party == 'Jubilee Alliance Party' & dat2$vote_df2 == 'small', 'winner', dat2$winlos)
dat2$winlos<- ifelse(dat2$country == 'kenya' & dat2$party == 'Orange Democratic Movement (ODM)' & dat2$vote_df2 == 'large', 'winner', dat2$winlos)

# extract dataset of people with some partisan affiliation. This would leave: 'Jub/DPP', 'ODM/MCP' and 'other' (1704 obs.)
dat6 <- (dat2[!(dat2$party=="Not close to any of the above" | dat2$party=="Prefer not to say" | dat2$party=="Don't Know"),])

#turn winloss into a factor variable
dat6$winlos = factor(dat6$winlos, levels = c('loser', 'winner'))

############################################################
#creation of variable the describes whether the electoral winner's party agent is present
#(i.e. restructuring the party agent data to fit the winner-loser framework)
dat6$win_pres = 'Agent absent'
dat6$win_pres <- ifelse(dat6$country == 'malawi' & dat6$`ODM/MCP` == ' Agent present ', 'Agent present', dat6$win_pres)
dat6$win_pres <- ifelse(dat6$country == 'kenya' & dat6$`ODM/MCP` == ' Agent present ' & dat6$vote_df2 == 'large', 'Agent present', dat6$win_pres)
dat6$win_pres <- ifelse(dat6$country == 'kenya' & dat6$`Jub/DPP` == 'Agent present' & dat6$vote_df2 == 'small', 'Agent present', dat6$win_pres)

#creation of variable the describes whether the electoral loser's party agent is present
dat6$los_pres = 'Agent absent '
dat6$los_pres <- ifelse(dat6$country == 'malawi' & dat6$`Jub/DPP` == 'Agent present', 'Agent present ', dat6$los_pres)
dat6$los_pres <- ifelse(dat6$country == 'kenya' & dat6$`ODM/MCP` == ' Agent present ' & dat6$vote_df2 == 'small', 'Agent present ', dat6$los_pres)
dat6$los_pres <- ifelse(dat6$country == 'kenya' & dat6$`Jub/DPP` == 'Agent present' & dat6$vote_df2 == 'large', 'Agent present ', dat6$los_pres)

#turning the new agent variables into factors
dat6$win_pres <- factor(dat6$win_pres, levels = c('Agent absent', 'Agent present'))
dat6$los_pres <- factor(dat6$los_pres, levels = c('Agent absent ', 'Agent present '))

#renaming the variables for pretty plotting
dat6$`Winner's Agt` = dat6$win_pres
dat6$`Loser's Agt` = dat6$los_pres


#conducting winlos analysis again but on strictly respondents belonging to major parties 
dat7 <- (dat6[!(dat6$party3=="other"),])

#AMCE estimation 
winlos_partial <- cj(dat7, chosen ~ `Presiding Officer` + `Winner's Agt` +`Loser's Agt` + `Observers`, id = ~id, estimate = "amce", by = ~ winlos)

#plotting (Figure A22)
winlos_partial_plt <- plot(winlos_partial, group = "winlos", legend_title = 'Tally Winner/Loser') 
winlos_partial_plt #output


#AMCE estimation
winlos_analysis <- cj(dat6, chosen ~ `Presiding Officer` + `Winner's Agt` +`Loser's Agt` + `Observers`, id = ~id, estimate = "amce", by = ~ winlos)

#plotting  (Figure A23)
winlos_full_plt <- plot(winlos_analysis, group = "winlos", legend_title = 'Tally Winner/Loser') 
winlos_full_plt #output

###################################################### END OF REPLICATION FILE ##################################################



